Data of 10 cultivars of rice inoculated with B. glumae or mock inoculated. Discoloration of spikelets were recorded and presented as percentage.

library(tidyverse)
library(readxl)
library(ggplot2)
rice_data <- read_excel("Spray-Data-06.18.20.xlsx", 
                        col_types = c("text", "numeric", "numeric",
                                      "numeric", "numeric","numeric"))
rice_data
## # A tibble: 320 x 6
##    Genotype   Rep `Mock_30C-22C` `Mock_30C-28C` `Pathogen_30C-22C`
##    <chr>    <dbl>          <dbl>          <dbl>              <dbl>
##  1 310111       1           5.88           18.6               86.5
##  2 310111       2           2.48           79.7               82.6
##  3 310111       3           2.9            61.7              100  
##  4 310111       4           0              67.2               86.1
##  5 310111       5           1.76           75.4               98.6
##  6 310111       6           0              76.6               93.8
##  7 310111       7          NA              NA                 85.7
##  8 310111       8          NA              NA                100  
##  9 310111       9          NA              NA                100  
## 10 310111      10          NA              NA                100  
## # … with 310 more rows, and 1 more variable: Pathogen_30C-28C <dbl>

We still have to “reshape” the table to make it in longer format coding a column for treatment (Mock vs Inoculated) and temperature profile (30-22 vs 30-28).

rice_data_long <- rice_data %>% 
  pivot_longer(cols = c("Mock_30C-22C", "Mock_30C-28C", 
                        "Pathogen_30C-22C", "Pathogen_30C-28C"),
               names_to = "Inoculation", 
               values_to = "DiscPerc") %>%
  separate(col = Inoculation, 
            sep = "_",
            into = c("Inoculation", "TempProfile")) %>% 
  unite("Inoc_Temp", Inoculation:TempProfile, remove = FALSE)

#kableExtra::kable(rice_data_long, format = "markdown")

Data Exploration

Separating mock from pathogen inoculated:

ggplot(data = rice_data_long, aes(x = Genotype, y = DiscPerc)) +
  geom_boxplot(aes(fill = TempProfile)) +
  facet_grid(. ~ Inoculation) +
  coord_flip()

Looking at genotype effect:

ggplot(data = rice_data_long, aes(x = Inoc_Temp, y = DiscPerc)) +
  geom_boxplot(aes(fill = TempProfile)) +
  facet_wrap(Genotype ~ ., ncol = 5) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1))

Clustering analysis

Since we are dealing with continuous data on four different conditions with need to scale them to estimate their relationships.

Kmeans

library(FactoMineR)
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(RColorBrewer)
#Need to remove NAs and calculate means
rice_data_NoNAs <- rice_data_long %>% 
  drop_na() %>%
  group_by(Genotype, Inoc_Temp) %>%
  summarise(meanDiscPerc = mean(DiscPerc))
## `summarise()` has grouped output by 'Genotype'. You can override using the `.groups` argument.
rice_matrix <- rice_data_NoNAs %>% 
  pivot_wider(names_from = "Inoc_Temp", 
              values_from = "meanDiscPerc") %>%
  column_to_rownames(var = "Genotype")

# rice_matrix_all <- rice_data_long %>%
#   drop_na() %>%
#   unite(gen_rep, Genotype, Rep, sep = "_", remove = FALSE) %>%
#   select(-c(Genotype, Rep, Inoculation, TempProfile)) %>%
#   pivot_wider(names_from = "Inoc_Temp",
#               values_from = "DiscPerc") %>%
#   column_to_rownames(var = "gen_rep")
# 
# rice_matrix_all <- na.omit(rice_matrix_all)
set.seed(123)
(cluster_number <- NbClust::NbClust(rice_matrix, distance = "euclidean", min.nc = 2, 
                                   max.nc = 10, method = "complete", index = "all"))

## *** : The Hubert index is a graphical method of determining the number of clusters.
##                 In the plot of Hubert index, we seek a significant knee that corresponds to a 
##                 significant increase of the value of the measure i.e the significant peak in Hubert
##                 index second differences plot. 
## 

## *** : The D index is a graphical method of determining the number of clusters. 
##                 In the plot of D index, we seek a significant knee (the significant peak in Dindex
##                 second differences plot) that corresponds to a significant increase of the value of
##                 the measure. 
##  
## ******************************************************************* 
## * Among all indices:                                                
## * 7 proposed 2 as the best number of clusters 
## * 4 proposed 3 as the best number of clusters 
## * 4 proposed 4 as the best number of clusters 
## * 4 proposed 6 as the best number of clusters 
## * 1 proposed 9 as the best number of clusters 
## * 3 proposed 10 as the best number of clusters 
## 
##                    ***** Conclusion *****                            
##  
## * According to the majority rule, the best number of clusters is  2 
##  
##  
## *******************************************************************
## $All.index
##        KL      CH Hartigan    CCC    Scott      Marriot     TrCovW    TraceW
## 2  3.9385 13.8661   5.5512 5.2116  85.2057 1.591798e+14 22133403.7 14634.711
## 3  0.2258 11.1887  11.1453 3.4223  98.4780 1.844441e+14 11053988.0 11185.181
## 4  2.0819 15.1195   6.5969 3.8585 135.1775 5.233860e+13  4393128.3  6755.953
## 5  1.4876 16.5602   4.9639 3.8478 156.1719 2.862559e+13  2267950.4  4783.641
## 6  2.2243 17.3834   2.7395 3.7478 180.1473 1.243078e+13  1085700.9  3594.217
## 7  1.1773 16.5075   2.2867 3.2558 190.5733 1.004601e+13   622048.3  3006.009
## 8  0.4748 15.6599   4.1223 2.7494 204.5454 6.524977e+12   589494.1  2556.347
## 9  1.1075 17.3477   4.2726 2.8759 237.6249 1.579688e+12   378782.3  1902.716
## 10 1.2951 19.8949   3.8895 3.1342 270.4243 3.783177e+11   221021.6  1370.420
##    Friedman    Rubin Cindex     DB Silhouette   Duda Pseudot2   Beale Ratkowsky
## 2   50.4561  11.4207 0.4411 1.1281     0.3685 0.5395   4.2679  1.7173    0.3919
## 3   61.0212  14.9428 0.4636 1.0575     0.3434 0.3800  17.9471  3.6107    0.3816
## 4  146.6309  24.7394 0.3546 0.9320     0.3988 2.8395  -0.6478 -0.7820    0.3698
## 5  157.4737  34.9395 0.4690 0.6674     0.4680 2.3880  -1.1625 -0.9355    0.3607
## 6  207.8711  46.5019 0.6245 0.5848     0.5087 0.5716   2.9981  1.4476    0.3522
## 7  249.1857  55.6013 0.6516 0.6346     0.4230 2.8554  -0.6498 -0.7844    0.3292
## 8  294.0802  65.3816 0.6496 0.5567     0.4146 0.5129   4.7494  1.9110    0.3133
## 9  674.2626  87.8418 0.6140 0.5811     0.4303 0.6171   1.2411  0.9987    0.2986
## 10 809.6922 121.9611 0.6647 0.4971     0.5036 0.4045   4.4174  2.6661    0.2947
##         Ball Ptbiserial    Frey McClain   Dunn Hubert SDindex  Dindex   SDbw
## 2  7317.3553     0.5896  0.2182  0.5312 0.3345  1e-04  0.1112 25.7098 1.6974
## 3  3728.3936     0.6384  0.6668  0.6359 0.3834  1e-04  0.1016 22.6733 0.5771
## 4  1688.9882     0.5986 -0.0023  1.4183 0.4140  1e-04  0.0944 16.9674 0.3800
## 5   956.7282     0.6168  0.1659  1.4149 0.5259  1e-04  0.0734 14.1098 0.1527
## 6   599.0361     0.6241  1.9687  1.4570 0.7451  1e-04  0.0659 12.1846 0.1018
## 7   429.4299     0.5600  0.6022  1.9014 0.3917  1e-04  0.1074 11.0044 0.0857
## 8   319.5433     0.5490  0.7893  2.0137 0.4009  1e-04  0.1109  9.8531 0.0645
## 9   211.4129     0.4684  0.2241  2.9606 0.4261  1e-04  0.1099  8.3622 0.0520
## 10  137.0420     0.4538  0.4391  3.2129 0.5126  1e-04  0.1060  6.9541 0.0406
## 
## $All.CriticalValues
##    CritValue_Duda CritValue_PseudoT2 Fvalue_Beale
## 2          0.0772            59.7978       0.1857
## 3          0.2805            28.2219       0.0125
## 4         -0.3257            -4.0703       1.0000
## 5         -0.1694           -13.8056       1.0000
## 6          0.0160           246.4030       0.2641
## 7         -0.3257            -4.0703       1.0000
## 8          0.0772            59.7978       0.1479
## 9         -0.1694           -13.8056       0.4615
## 10        -0.0628           -50.8045       0.0842
## 
## $Best.nc
##                     KL      CH Hartigan    CCC   Scott      Marriot   TrCovW
## Number_clusters 2.0000 10.0000    3.000 2.0000  4.0000 4.000000e+00        3
## Value_Index     3.9385 19.8949    5.594 5.2116 36.6995 1.083924e+14 11079416
##                   TraceW Friedman   Rubin Cindex      DB Silhouette   Duda
## Number_clusters    4.000   9.0000  6.0000 4.0000 10.0000     6.0000 2.0000
## Value_Index     2456.916 380.1823 -2.4631 0.3546  0.4971     0.5087 0.5395
##                 PseudoT2  Beale Ratkowsky     Ball PtBiserial Frey McClain
## Number_clusters   2.0000 2.0000    2.0000    3.000     3.0000    1  2.0000
## Value_Index       4.2679 1.7173    0.3919 3588.962     0.6384   NA  0.5312
##                   Dunn Hubert SDindex Dindex    SDbw
## Number_clusters 6.0000      0  6.0000      0 10.0000
## Value_Index     0.7451      0  0.0659      0  0.0406
## 
## $Best.partition
## 301161 310111 310131 310219 310301 310354 310645 310747 310802 310998 311078 
##      1      2      1      2      2      1      1      1      1      1      2 
## 311151 311206 311383 311385 311600 311642 311677 311735 311795 
##      1      1      2      1      1      1      2      1      2
#K-means
set.seed(123)
km.res4 <- kmeans(scale(rice_matrix), 4, nstart = 10)

fviz_cluster(km.res4, data = rice_matrix, labelsize = 10)

#Data Clusters
#rice_Kcluster <- cbind(rice_data_NoNAs, km.res3$cluster)

rice_Kcluster_data <- drop_na(rice_data) %>%
  pivot_longer(cols = c("Mock_30C-22C", "Mock_30C-28C", 
                        "Pathogen_30C-22C", "Pathogen_30C-28C"),
               names_to = "Inoculation", 
               values_to = "DiscPerc") %>%
  separate(col = Inoculation, 
            sep = "_",
            into = c("Inoculation", "TempProfile")) %>% 
  unite("Inoc_Temp", Inoculation:TempProfile, remove = FALSE) %>%
  left_join(as_tibble(km.res4$cluster, rownames="Genotype"), 
            by = "Genotype") %>%
  rename(cluster = "value") %>%
  mutate(response = recode(cluster, '1'='Temperature dependent', 
                                    '2'='Temperature dependent', 
                                    '3'='Temperature independent', 
                                    '4'='Temperature independent')) %>%
  mutate(response_dis = recode(cluster, '1'='Disease severity (>50%)', 
                                    '2'='Disease severity (<50%)', 
                                    '3'='Disease severity (>50%)', 
                                    '4'='Disease severity (<50%)'))
  

ggplot(rice_Kcluster_data, aes(x = as.factor(cluster), y = DiscPerc)) + 
  geom_boxplot(aes(fill = Inoc_Temp)) +
  facet_wrap(vars(response, response_dis),nrow = 1, scales = "free_x") +
  scale_fill_manual(values = alpha(c("#80cdc1","#01665e","#dfc27d","#8c510a"), .7)) +
  labs(title = "K-means clustering", x="Cluster", 
       y="Discoloration Percent (%)", 
       fill="Inoculation & \nTemperature profile")

(Kmeans_summary <- rice_Kcluster_data %>%
  group_by(Genotype, Inoc_Temp, response, response_dis, cluster) %>%
  summarise(meanDiscPerc = mean(DiscPerc)) %>%
  pivot_wider(names_from = Inoc_Temp,
              values_from = meanDiscPerc) %>%
  arrange(cluster)
)
## `summarise()` has grouped output by 'Genotype', 'Inoc_Temp', 'response', 'response_dis'. You can override using the `.groups` argument.
## # A tibble: 20 x 8
## # Groups:   Genotype, response, response_dis [20]
##    Genotype response       response_dis    cluster `Mock_30C-22C` `Mock_30C-28C`
##    <chr>    <chr>          <chr>             <int>          <dbl>          <dbl>
##  1 310111   Temperature d… Disease severi…       1           2.17          63.2 
##  2 311078   Temperature d… Disease severi…       1           3.49          55.2 
##  3 311677   Temperature d… Disease severi…       1          12.4           41.3 
##  4 310219   Temperature d… Disease severi…       2           6.56          18.9 
##  5 310301   Temperature d… Disease severi…       2           4.96          14.4 
##  6 310354   Temperature d… Disease severi…       2           3.97           4.42
##  7 310645   Temperature d… Disease severi…       2           7.79           5.22
##  8 310747   Temperature d… Disease severi…       2           9.88           3.57
##  9 310998   Temperature d… Disease severi…       2           8.29          11.4 
## 10 311600   Temperature d… Disease severi…       2          15.3           11.1 
## 11 311642   Temperature d… Disease severi…       2           2.97           3.96
## 12 311735   Temperature d… Disease severi…       2           9.60          13.2 
## 13 311383   Temperature i… Disease severi…       3          19.9            8.06
## 14 311795   Temperature i… Disease severi…       3          28.1           32.6 
## 15 301161   Temperature i… Disease severi…       4          12.6           22.9 
## 16 310131   Temperature i… Disease severi…       4           3.01           3.82
## 17 310802   Temperature i… Disease severi…       4          10.6            9.19
## 18 311151   Temperature i… Disease severi…       4           3.94           3.91
## 19 311206   Temperature i… Disease severi…       4           1.76           5.40
## 20 311385   Temperature i… Disease severi…       4           1.63           5.48
## # … with 2 more variables: Pathogen_30C-22C <dbl>, Pathogen_30C-28C <dbl>

Hierarchical clustering

#Clustering
rice_hc <- hcut(rice_matrix, 4,stand = T, hc_method = "ward.D", hc_metric = "euclidean")

#checking clusters
fviz_cluster(rice_hc)

fviz_silhouette(rice_hc, print.summary = F)

#Graphical view
p <- fviz_dend(rice_hc, rect = T, cex=0.8, horiz = T, repel = TRUE, color_labels_by_k = T, 
                k_colors = "jco", rect_fill = T, rect_border = "jco")
(p2 <- p + annotate(x=-2, xend=-2, y=0, yend=12, colour="dark gray", lwd=0.5, geom="segment"))

#Data Clusters
rice_cluster <- cbind(rice_matrix, rice_hc$cluster)

rice_cluster_data <- rice_cluster %>%
  rownames_to_column(var = "Genotype") %>%
    pivot_longer(cols = c("Mock_30C-22C", "Mock_30C-28C", 
                        "Pathogen_30C-22C", "Pathogen_30C-28C"),
               names_to = "Inoculation", 
               values_to = "DiscPerc") %>%
  separate(col = Inoculation, 
            sep = "_",
            into = c("Inoculation", "TempProfile")) %>% 
  unite("Inoc_Temp", Inoculation:TempProfile, remove = FALSE) %>%
  dplyr::rename(cluster = "rice_hc$cluster") %>%
  mutate(response = recode(cluster, '1'='Temperature independent', 
                                    '2'='Temperature dependent', 
                                    '3'='Temperature independent', 
                                    '4'='Temperature dependent')) %>%
  mutate(response_dis = recode(cluster, '1'='Disease severity (<50%)', 
                                    '2'='Disease severity (>50%)', 
                                    '3'='Disease severity (>50%)', 
                                    '4'='Disease severity (<50%)'))

ggplot(rice_cluster_data, aes(x = as.factor(cluster), y = DiscPerc)) +   geom_boxplot(aes(fill = Inoc_Temp)) +
  facet_wrap(vars(response, response_dis),nrow = 1, scales = "free_x") +
  scale_fill_manual(values = alpha(c("#80cdc1","#01665e","#dfc27d","#8c510a"), .7)) +
  labs(x= "Cluster", y = "% Discolored spikelets", fill="Treatment") +
  theme(text = element_text(size=12))

(hc_summary <- rice_cluster_data %>%
  group_by(Genotype, Inoc_Temp, response, response_dis, cluster) %>%
  summarise(meanDiscPerc = mean(DiscPerc)) %>%
  pivot_wider(names_from = Inoc_Temp,
              values_from = meanDiscPerc) %>%
  arrange(cluster)
)
## `summarise()` has grouped output by 'Genotype', 'Inoc_Temp', 'response', 'response_dis'. You can override using the `.groups` argument.
## # A tibble: 20 x 8
## # Groups:   Genotype, response, response_dis [20]
##    Genotype response       response_dis    cluster `Mock_30C-22C` `Mock_30C-28C`
##    <chr>    <chr>          <chr>             <int>          <dbl>          <dbl>
##  1 301161   Temperature i… Disease severi…       1          17.8           22.9 
##  2 310131   Temperature i… Disease severi…       1           3.01           4.10
##  3 310219   Temperature i… Disease severi…       1          12.7           18.9 
##  4 310802   Temperature i… Disease severi…       1          10.6            9.19
##  5 311151   Temperature i… Disease severi…       1           3.94           3.91
##  6 311206   Temperature i… Disease severi…       1           1.76           5.40
##  7 311385   Temperature i… Disease severi…       1           1.63           5.48
##  8 311600   Temperature i… Disease severi…       1          15.3           11.1 
##  9 310111   Temperature d… Disease severi…       2           2.17          63.2 
## 10 311078   Temperature d… Disease severi…       2           2.75          55.2 
## 11 311677   Temperature d… Disease severi…       2          12.4           45.7 
## 12 310301   Temperature i… Disease severi…       3           4.96          13.7 
## 13 311383   Temperature i… Disease severi…       3          19.9            8.06
## 14 311795   Temperature i… Disease severi…       3          28.1           32.6 
## 15 310354   Temperature d… Disease severi…       4           4.24           4.42
## 16 310645   Temperature d… Disease severi…       4           7.79           5.22
## 17 310747   Temperature d… Disease severi…       4           6.73           3.57
## 18 310998   Temperature d… Disease severi…       4           8.29          11.4 
## 19 311642   Temperature d… Disease severi…       4           2.97           3.96
## 20 311735   Temperature d… Disease severi…       4           5.46          13.2 
## # … with 2 more variables: Pathogen_30C-22C <dbl>, Pathogen_30C-28C <dbl>

Comparing hierarchical clusters and kmean clustering

(Comparison_genotypes <- full_join(Kmeans_summary, hc_summary, by = "Genotype") %>%
  rename_with(~gsub("\\.x", "\\.kmeans", .x)) %>%
  rename_with(~gsub("\\.y", "\\.hc", .x))
)
## # A tibble: 20 x 15
## # Groups:   Genotype [20]
##    Genotype response.kmeans   response_dis.kme… cluster.kmeans `Mock_30C-22C.km…
##    <chr>    <chr>             <chr>                      <int>             <dbl>
##  1 310111   Temperature depe… Disease severity…              1              2.17
##  2 311078   Temperature depe… Disease severity…              1              3.49
##  3 311677   Temperature depe… Disease severity…              1             12.4 
##  4 310219   Temperature depe… Disease severity…              2              6.56
##  5 310301   Temperature depe… Disease severity…              2              4.96
##  6 310354   Temperature depe… Disease severity…              2              3.97
##  7 310645   Temperature depe… Disease severity…              2              7.79
##  8 310747   Temperature depe… Disease severity…              2              9.88
##  9 310998   Temperature depe… Disease severity…              2              8.29
## 10 311600   Temperature depe… Disease severity…              2             15.3 
## 11 311642   Temperature depe… Disease severity…              2              2.97
## 12 311735   Temperature depe… Disease severity…              2              9.60
## 13 311383   Temperature inde… Disease severity…              3             19.9 
## 14 311795   Temperature inde… Disease severity…              3             28.1 
## 15 301161   Temperature inde… Disease severity…              4             12.6 
## 16 310131   Temperature inde… Disease severity…              4              3.01
## 17 310802   Temperature inde… Disease severity…              4             10.6 
## 18 311151   Temperature inde… Disease severity…              4              3.94
## 19 311206   Temperature inde… Disease severity…              4              1.76
## 20 311385   Temperature inde… Disease severity…              4              1.63
## # … with 10 more variables: Mock_30C-28C.kmeans <dbl>,
## #   Pathogen_30C-22C.kmeans <dbl>, Pathogen_30C-28C.kmeans <dbl>,
## #   response.hc <chr>, response_dis.hc <chr>, cluster.hc <int>,
## #   Mock_30C-22C.hc <dbl>, Mock_30C-28C.hc <dbl>, Pathogen_30C-22C.hc <dbl>,
## #   Pathogen_30C-28C.hc <dbl>
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
kbl(Comparison_genotypes) %>%
   kable_styling(bootstrap_options = c("striped", "hover"))
Genotype response.kmeans response_dis.kmeans cluster.kmeans Mock_30C-22C.kmeans Mock_30C-28C.kmeans Pathogen_30C-22C.kmeans Pathogen_30C-28C.kmeans response.hc response_dis.hc cluster.hc Mock_30C-22C.hc Mock_30C-28C.hc Pathogen_30C-22C.hc Pathogen_30C-28C.hc
310111 Temperature dependent Disease severity (>50%) 1 2.170000 63.191667 91.27158 95.10185 Temperature dependent Disease severity (>50%) 2 2.170000 63.191667 95.44254 97.51295
311078 Temperature dependent Disease severity (>50%) 1 3.489498 55.183828 49.73357 99.30556 Temperature dependent Disease severity (>50%) 2 2.752993 55.183828 36.98202 88.11342
311677 Temperature dependent Disease severity (>50%) 1 12.370266 41.294279 68.61909 70.87940 Temperature dependent Disease severity (>50%) 2 12.370266 45.704566 64.50405 67.11457
310219 Temperature dependent Disease severity (<50%) 2 6.563795 18.915618 56.07677 65.93675 Temperature independent Disease severity (<50%) 1 12.739706 18.915618 49.99060 62.76437
310301 Temperature dependent Disease severity (<50%) 2 4.959850 14.361942 90.13625 95.69374 Temperature independent Disease severity (>50%) 3 4.959850 13.703063 90.36390 92.86771
310354 Temperature dependent Disease severity (<50%) 2 3.972466 4.416403 57.26400 31.79274 Temperature dependent Disease severity (<50%) 4 4.243452 4.416403 75.27696 28.36789
310645 Temperature dependent Disease severity (<50%) 2 7.788413 5.219974 66.79600 18.79711 Temperature dependent Disease severity (<50%) 4 7.788413 5.219974 68.14012 29.07289
310747 Temperature dependent Disease severity (<50%) 2 9.876596 3.574892 64.17873 60.49265 Temperature dependent Disease severity (<50%) 4 6.725744 3.574892 67.47515 55.12946
310998 Temperature dependent Disease severity (<50%) 2 8.293879 11.428291 73.35599 45.22939 Temperature dependent Disease severity (<50%) 4 8.293879 11.428291 78.72730 42.91322
311600 Temperature dependent Disease severity (<50%) 2 15.348676 11.066336 52.62807 36.31581 Temperature independent Disease severity (<50%) 1 15.348676 11.066336 66.81814 43.88809
311642 Temperature dependent Disease severity (<50%) 2 2.967537 3.961058 78.78229 50.21694 Temperature dependent Disease severity (<50%) 4 2.967537 3.961058 66.23344 57.01817
311735 Temperature dependent Disease severity (<50%) 2 9.601114 13.232224 98.39450 40.00939 Temperature dependent Disease severity (<50%) 4 5.458452 13.232224 86.26606 49.27782
311383 Temperature independent Disease severity (>50%) 3 19.936941 8.062516 87.26605 79.16193 Temperature independent Disease severity (>50%) 3 19.936941 8.062516 91.38036 82.59137
311795 Temperature independent Disease severity (>50%) 3 28.101615 32.611824 61.61891 87.69000 Temperature independent Disease severity (>50%) 3 28.101615 32.611824 59.52604 74.40808
301161 Temperature independent Disease severity (<50%) 4 12.605503 22.912969 23.92674 41.26231 Temperature independent Disease severity (<50%) 1 17.759236 22.912969 24.29358 39.02429
310131 Temperature independent Disease severity (<50%) 4 3.007357 3.817884 43.07445 38.53004 Temperature independent Disease severity (<50%) 1 3.007357 4.097184 37.81234 37.01545
310802 Temperature independent Disease severity (<50%) 4 10.607829 9.185665 35.45273 31.04425 Temperature independent Disease severity (<50%) 1 10.607829 9.185665 43.86360 25.40046
311151 Temperature independent Disease severity (<50%) 4 3.943224 3.912199 41.72423 58.64914 Temperature independent Disease severity (<50%) 1 3.943224 3.912199 38.96177 44.83546
311206 Temperature independent Disease severity (<50%) 4 1.757405 5.397225 44.56139 15.45172 Temperature independent Disease severity (<50%) 1 1.757405 5.397225 44.13627 26.90888
311385 Temperature independent Disease severity (<50%) 4 1.632375 5.476574 23.53724 48.39385 Temperature independent Disease severity (<50%) 1 1.632375 5.476574 28.03003 53.05278
#write_csv(Comparison_genotypes, "Comparison_genotypes.csv")

PCA

This first one is using FactoMiner,

#Need to remove NAs
rice_data_NoNAs <- na.omit(rice_data)

#Creating Matrix for analysis
rice_matrix <- rice_data_NoNAs[,3:6]
row.names(rice_matrix) <- paste0(rice_data_NoNAs$Genotype,"_",rice_data_NoNAs$Rep)
## Warning: Setting row names on a tibble is deprecated.
#PCA
rice_PCA <- prcomp(rice_matrix, center = T, scale. = T)


#Since we have 20 cultivars we need a palette with 20 colors
colors_n <- length(unique(rice_data_NoNAs$Genotype))
getPalette <- colorRampPalette(brewer.pal(9, "Dark2"))
## Warning in brewer.pal(9, "Dark2"): n too large, allowed maximum for palette Dark2 is 8
## Returning the palette you asked for with that many colors
(PCA_rice <- fviz_pca_biplot(rice_PCA, col.var = "blue",
                label = "var", repel = T,
                habillage = rice_data_NoNAs$Genotype, addEllipses = TRUE, ellipse.level = 0.95,
             ellipse.type ="confidence") +
  scale_color_manual(values = getPalette(colors_n)) +
  scale_shape_manual(values = c(rep(19, 5), rep(16,5), rep(17,5), rep(18,5))))
## Scale for 'shape' is already present. Adding another scale for 'shape', which
## will replace the existing scale.

plotly::ggplotly(PCA_rice)
## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomTextRepel() has yet to be implemented in plotly.
##   If you'd like to see this geom implemented,
##   Please open an issue with your example code at
##   https://github.com/ropensci/plotly/issues
library(viridis)
## Loading required package: viridisLite
#PCA
rice_PCA <- prcomp(rice_matrix, center = T, scale. = T)

#PCA
biplot <- ggbiplot::ggbiplot(rice_PCA, obs.scale = 1, var.scale = 1) +
  geom_text(aes(label=rice_data_NoNAs$Genotype), size = 2, nudge_y = 0.2, alpha=0.5) +
  geom_point(aes(colour=rice_data_NoNAs$`Pathogen_30C-22C`)) +
  scale_color_viridis(name = "Pathogen_30C-22C", option = "D")
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following object is masked from 'package:purrr':
## 
##     compact
## 
## Attaching package: 'scales'
## The following object is masked from 'package:viridis':
## 
##     viridis_pal
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
plotly::ggplotly(biplot)
sessionInfo()
## R version 4.0.4 (2021-02-15)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.7
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] scales_1.1.1       plyr_1.8.6         viridis_0.5.1      viridisLite_0.3.0 
##  [5] kableExtra_1.3.1   RColorBrewer_1.1-2 factoextra_1.0.7   FactoMineR_2.4    
##  [9] readxl_1.3.1       forcats_0.5.1      stringr_1.4.0      dplyr_1.0.5       
## [13] purrr_0.3.4        readr_1.4.0        tidyr_1.1.2        tibble_3.1.0      
## [17] ggplot2_3.3.3      tidyverse_1.3.0   
## 
## loaded via a namespace (and not attached):
##  [1] fs_1.5.0             lubridate_1.7.9.2    webshot_0.5.2       
##  [4] httr_1.4.2           ggsci_2.9            ggbiplot_0.55       
##  [7] tools_4.0.4          backports_1.2.1      utf8_1.2.1          
## [10] R6_2.5.0             DT_0.17              lazyeval_0.2.2      
## [13] DBI_1.1.1            colorspace_2.0-0     withr_2.4.1         
## [16] tidyselect_1.1.0     gridExtra_2.3        curl_4.3            
## [19] compiler_4.0.4       cli_2.3.1            rvest_0.3.6         
## [22] flashClust_1.01-2    xml2_1.3.2           plotly_4.9.3        
## [25] labeling_0.4.2       digest_0.6.27        foreign_0.8-81      
## [28] rmarkdown_2.6        rio_0.5.16           pkgconfig_2.0.3     
## [31] htmltools_0.5.1.1    dbplyr_2.1.0         highr_0.8           
## [34] htmlwidgets_1.5.3    rlang_0.4.10         rstudioapi_0.13     
## [37] farver_2.1.0         generics_0.1.0       jsonlite_1.7.2      
## [40] crosstalk_1.1.1      dendextend_1.14.0    zip_2.1.1           
## [43] car_3.0-10           magrittr_2.0.1       leaps_3.1           
## [46] NbClust_3.0          Rcpp_1.0.6           munsell_0.5.0       
## [49] fansi_0.4.2          abind_1.4-7          lifecycle_1.0.0     
## [52] scatterplot3d_0.3-41 stringi_1.5.3        yaml_2.2.1          
## [55] carData_3.0-4        debugme_1.1.0        MASS_7.3-53         
## [58] ggrepel_0.9.1        crayon_1.4.1         lattice_0.20-41     
## [61] haven_2.3.1          hms_1.0.0            knitr_1.31          
## [64] pillar_1.5.1         ggpubr_0.4.0         ggsignif_0.6.0      
## [67] reprex_1.0.0         glue_1.4.2           evaluate_0.14       
## [70] data.table_1.13.6    modelr_0.1.8         vctrs_0.3.7         
## [73] cellranger_1.1.0     gtable_0.3.0         assertthat_0.2.1    
## [76] xfun_0.20            openxlsx_4.2.3       broom_0.7.4         
## [79] rstatix_0.6.0        cluster_2.1.0        ellipsis_0.3.1